
/*
 * watchd.c
 *
 * daemon for Berkshire Products Serial Watchdog in Command mode.
 * 
 * Copyright (C) 2001  Workhorse Computing (lembark@wrkhors.com), GPL
 * 
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free 
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 675 Mass Ave, Cambridge, MA 02139, USA.
 * 
 */

/***********************************************************************
 * constants, debug_blah macros #ifdef'd  DEBUG.
 **********************************************************************/

#include "watchd.h"

/***********************************************************************
 * globals
 **********************************************************************/

/* How many errors before we exit nonzero */

static int errs_read  = ERROR_LIMIT;
static int errs_write = ERROR_LIMIT;

/* fd for send and recv */

static int fd;

/* exit flag for daemon loop */

static int goodnight = 0;

/* control actions on external relay. */

static int altrelaycmd = 0;

/***********************************************************************
 * subroutines
 **********************************************************************/

/***********************************************************************
 * low-level i/o.  read and write buffers to serial port.
 **********************************************************************/

void open_fd( char *device )
{
	extern int fd;

	/* 
	 * assumption:  the only thing on this port is a watchdog,
	 * in command mode, so there isn't any real reason to reset
	 * the config on the way out.
	 */

	struct termios newtio;

	/*
	 * Termial setup - 1200 8N2 
	 */

	bzero( &newtio, sizeof(newtio) );

	newtio.c_cflag = B1200 | CS8 | CLOCAL | CREAD | CSTOPB;
	newtio.c_iflag = IGNPAR;
	newtio.c_oflag = 0;
	newtio.c_lflag = 0;
	newtio.c_cc[VTIME] = 0;
	newtio.c_cc[VMIN] = 1;

	/*
	 * Open device or die trying...
	 */


	if( (fd = open(device, O_RDWR | O_NOCTTY | O_SYNC)) < 0 )
	{
		syslog( LOG_DAEMON | LOG_ERR, "Exiting - cannot open device %s.", device );
		exit( -1 );
	}

	debug_str( "Opened device", device );
	debug_value( "File Descriptor", fd );

	tcflush( fd, TCIFLUSH );

	if( tcsetattr(fd, TCSANOW, &newtio) )
	{
		perror( "tcsetattr failed to configure device " );

		syslog( LOG_DAEMON | LOG_ERR, "Unable to Configure Device %s.", device );
		exit( -1 );
	}
	else
	{
		syslog( LOG_DAEMON | LOG_INFO, "Successfully set %s to 1200/8/N/2." , device );
	}

	/* 
	 * if we are still alive at this point then the
	 * watchdog device is open and configured (we hope).
	 */
}

/* 
 * note: read and write buffer send a LOG_CRIT message
 * before they croak the program since this may cause
 * a reboot.
 */

int write_buffer( int command )
{
	static unsigned char sendbuf[5] =  { 0x01, 0x57, 0x84, 0x00, 0x00 };
	static unsigned char cksum = 0x01 + 0x57 + 0x84;

	static int result;
	
	/* 
	 * the checksum byte is whatever value makes the
	 * entire buffer add up to 0.
	 */

	sendbuf[3] = (unsigned char) command;
	sendbuf[4] = 0x00 - ( cksum + (unsigned char) command );

	debug_byte( "Writing Command", (unsigned char) sendbuf[3] );
	debug_byte( "Checksum", sendbuf[4] );

	if( (result = write(fd, sendbuf, 5)) < 0 )
	{
		syslog( LOG_DAEMON | LOG_WARNING, "Error Writing Watchdog: %d", result );
		--errs_write;
	}
	else if( result != sizeof( sendbuf ) )
	{
		syslog( LOG_DAEMON | LOG_NOTICE, "Runt Write to Watchdog" );
		--errs_write;
	}

	if( !errs_write )
	{
		/*
		 * assume that if we have this many write errors
		 * that the card is unreachable.
		 */

		syslog( LOG_DAEMON | LOG_CRIT, "Aborting:  Too Many Write Errors" );
		exit( -2 );
	}

	/*
	 * give the unit 1/10 sec to catch up after responding.
	 */

	usleep( 100000 );

	return result;
}

unsigned char read_buffer( void )
{
	static unsigned char recvbuf[4];

	static int result;

	memset( recvbuf, '\0', sizeof(recvbuf) );

	if( (result = read(fd, recvbuf, 4)) < 0 )
	{
		syslog( LOG_DAEMON | LOG_WARNING, "Error Reading Watchdog: %d", result );
		--errs_read;
	}
	else if( result != sizeof(recvbuf) )
	{
		syslog( LOG_DAEMON | LOG_NOTICE, "Incomplete Write to Watchdog" );
		--errs_read;
	}
	else if( recvbuf[0] != 0x57 || recvbuf[1] != 0x84 || recvbuf[3] != 0x0d )
	{
		syslog( LOG_DAEMON | LOG_NOTICE, "Offball buffer returned from Watchdog" );
		--errs_read;
	}

	debug_value( "Read Result", result );
	debug_value( "Read Error Remaining", errs_read );
	debug_byte( "Reply", recvbuf[2] );

	if( !errs_read )
	{
		/*
		 * assume that if we have this many read errors
		 * that the card is unreachable.
		 */

		syslog( LOG_DAEMON | LOG_CRIT, "Aborting:  Too Many Read Errors" );
		exit( -3 );
	}

	/*
	 * give the unit a chance to catch up after writing.
	 * give the unit 1/10 sec to catch up after responding.
	 */

	usleep( 100000 );

	return recvbuf[2];
}

/***********************************************************************
 * high(er)-level i/o.  these understand and log the specific commands.
 **********************************************************************/

/* 
 * sends a single command w/ more descriptive logging.
 */

void send_command( int command )
{
	/*
	 * validate the command, substitute NoOp for invalid commands.
	 * caller gets back result of the write -- negative for failed
	 * write, < 5 for runt write.
	 */
	
	switch( command )
	{

		case 0x00 :		command = 0x34;
		case 0x34 : 	debug_cmd( "NoOp",					command );	break;

		case 0x20 : 	debug_cmd( "Reset Now", 			command );	break;
		case 0x21 : 	debug_cmd( "Reset in 10 Sec",		command );	break;
		case 0x24 : 	debug_cmd( "Arm Now",				command );	break;
		case 0x28 : 	debug_cmd( "Disable",				command );	break;
		case 0x29 : 	debug_cmd( "Enable",				command );	break;
		case 0x30 : 	debug_cmd( "Query Major Rev",		command );	break;
		case 0x31 : 	debug_cmd( "Query Minor Rev",		command );	break;
		case 0x38 : 	debug_cmd( "Get Reset Count",		command );	break;
		case 0x39 : 	debug_cmd( "Clear Reset Count",		command );	break;
		case 0x50 : 	debug_cmd( "Turn On Alt. Relay",	command );	break;
		case 0x51 : 	debug_cmd( "Turn Off Alt. Relay",	command );	break;
		case 0x58 : 	debug_cmd( "Query Temprature",		command );	break;

		default:		debug_cmd( "Bogus Command Requested, replacing with NoOp", command );
						syslog( LOG_DAEMON | LOG_ERR, "Bogus Command Requested: %d (%#02x)", command, command );
						command = 0x24;

	}

	/*
	 * caller gets back the result of writing the
	 * command (hopefully zero).
	 */

	write_buffer( command );
}

unsigned char recv_reply( void )
{
	unsigned char reply = read_buffer();

	/*
	 * numeric replys have 0x20 added to them to avoid anything 
	 * that looks like a control code.
	 */
	
	switch( reply )
	{

		case 0x06:
			debug_msg( "Received ACK" );
			reply = 0;
			break;

		case 0x15:
			debug_msg( "Received NAK1 (Checksum Error)" );
			syslog( LOG_DAEMON | LOG_ERR, "Recv Checksum Error" );
			break;

		case 0x16:
			debug_msg( "Received NAK2 (Invalid Command)" );
			syslog( LOG_DAEMON | LOG_ERR, "Recv Invalid Command" );
			break;

		case 0x20:
			debug_msg( "Received Watchdog Armed" );
			break;

		case 0x21:
			debug_msg( "Received Watchdog Already Armed" );
			break;

		default:
			debug_byte( "Reply Value", reply - 0x20 );

	}

	/*
	 * caller gets back the magic byte from the
	 * reply buffer as an integer.
	 */

	 return reply;
}

/***********************************************************************
 * canned control sequences
 **********************************************************************/

/*
 * initialize the unit.
 * get the version, status & temp then arm it.
 */

void init_berk( void )
{
	unsigned char reply;

	send_command( WATCHDOG_REV_MAJOR );
	syslog( LOG_DAEMON | LOG_INFO, "Major Rev.: %d", recv_reply() - 0x20 );

	send_command( WATCHDOG_REV_MINOR );
	syslog( LOG_DAEMON | LOG_INFO, "Minor Rev.: %d", recv_reply() - 0x20 );

	send_command( WATCHDOG_TEMPERATURE );
	syslog( LOG_DAEMON | LOG_INFO, "Curr. Temp.: %d", recv_reply() - 0x20 );

	send_command( WATCHDOG_RESETCOUNT );
	syslog( LOG_DAEMON | LOG_INFO, "Curr Reset Count:  %d", recv_reply() - 0x20 );

	/*
	 * i get sick of seeing the light turned on 
	 * when i'm testing this thing...
	 */

	send_command( WATCHDOG_CLEARRESET );

	/*
	 * now things get dangerous...
	 */

	send_command( WATCHDOG_ARM );

	if( (reply = recv_reply()) )
	{
		syslog( LOG_DAEMON | LOG_NOTICE, "NACK Arming Watchdog: %#02x", reply );
	}
	else
	{
		syslog( LOG_DAEMON | LOG_INFO, "Watchdog Armed" );
	}

	send_command( WATCHDOG_ENABLE );

	if( (reply = recv_reply()) == WATCHDOG_ARMED )
	{
		syslog( LOG_DAEMON | LOG_INFO, "Watchdog Enabled" );
	}
	else if( reply == WATCHDOG_PREARMED )
	{
		syslog( LOG_DAEMON | LOG_INFO, "Watchdog Was Already Enabled" );
	}
	else
	{
		syslog( LOG_DAEMON | LOG_ERR, "Watchdog Enable Failed" );
	}

}

/*
 * shut down the unit.
 * called before graceful exit.
 * if this fails then the system has a really, really
 * good chance of going down unexpectedly.
 */

int shut_berk( void )
{
	int result;

	send_command( WATCHDOG_DISABLE );	

	if( (result = recv_reply()) )
	{
		/* this is really, really bad news...  */

		syslog( LOG_DAEMON | LOG_CRIT | LOG_CONS, "NACK Disabling Watchdog" );
	}
	else
	{
		syslog( LOG_DAEMON | LOG_INFO, "Watchdog Disabled" );
	}

	return result;
}

/*
 * control the alternate relay.
 * called from daemon if altrelaycmd is non-zero.
 */
 
int operate_relay( void )
{
	int reply;

	switch( altrelaycmd )
	{
		case WATCHDOG_RELAYOPEN:
		case WATCHDOG_RELAYCLOSE:

			syslog( LOG_DAEMON | LOG_NOTICE, "Operating alternate relay" );

			send_command( altrelaycmd );

			if( (reply = recv_reply()) )
			{
				syslog( LOG_DAEMON | LOG_NOTICE, "NACK Operating Relay." );
			}
			else
			{
				syslog( LOG_DAEMON | LOG_INFO, "Relay Operated" );
			}

			break;

		default:

			syslog( LOG_DAEMON | LOG_ERR, "Offball comamnd in operate_relay: %d.", altrelaycmd );

	}

	/* regardless of what we got, reset the command. */

	altrelaycmd = 0;

	return reply != WATCHDOG_ACK;
}

/***********************************************************************
 * signal handlers
 ***********************************************************************/

/*
 * exit daemon mode.
 * hanles INT, QUIT, TERM.
 */

void graceful( int signum )
{
	syslog( LOG_DAEMON | LOG_NOTICE, "Exiting on Signal %d", signum );

	goodnight = 1;
	sleep( 0 );
}

void relayctl( int signum )
{
	switch( signum )
	{
		case SIGUSR1:

			altrelaycmd = WATCHDOG_RELAYCLOSE;		
			break;

		case SIGUSR2:

			altrelaycmd = WATCHDOG_RELAYOPEN;		
			break;

		default:

			syslog( LOG_DAEMON | LOG_ERR, "Offball signal handled by relayctl: %d.", signum );
	}
}

/*
 * simple way to keep tabs on watchd is to kill -HUP it
 * every so often and watch the reset count say zero.
 * time keeps syslog from supressing duplicate messages.
 */

void numreset( int signum )
{
	send_command( WATCHDOG_RESETCOUNT );
	syslog( LOG_DAEMON | LOG_INFO,
		"Curr. Time/Resets: %u %#02d", time((time_t *)NULL), recv_reply() );
}

/***********************************************************************
 * daemon code
 ***********************************************************************/

void daemonize( void )
{
	static time_t looptimer;
	static time_t now;

	/*
	 * -DDEBUG leaves fd's open.
	 * 
	 * not much more we can do with these as a daemon.
	 */

	close( 0 );
	close( 1 );

#ifndef DEBUG

	close( 2 ); /* need this for the debug_blah macros :-) */

#endif

	/*
	 * HUP sends out harmless syslog messages.
	 *
	 * INT, QUIT and TERM all gracefully exit after
	 * (hopefully) disarming the unit.
	 *
	 * use to tickle the alternate system.
	 * USR1 is on, USR2 is off.
	 */

	signal( SIGHUP,  &numreset );

	signal( SIGINT,  &graceful );
	signal( SIGQUIT, &graceful );
	signal( SIGTERM, &graceful );

	signal( SIGUSR1, &relayctl );
	signal( SIGUSR2, &relayctl );

	/*
	 * signal is the only way out of here after this.
	 * TERM or QUIT will both set the 
	 *
	 * read or write will exit nonzero on too many failures.
	 * other than that the signal handlers do it all.
	 *
	 * writing no-ops shouldn't cause any
	 * pain so we clear the buffer by reading
	 * it but don't check anything explicitly
	 * here.  
	 *
	 * in -DDEBUG mode this'll print send and 
	 * recv info for each cycle.
	 *
	 * shortest time on the unit is 5 sec., 2 sec should 
	 * be ample time to tickle the alternate relay and 
	 * ding the card w/o blowing up the server.
	 */

	for( altrelaycmd = goodnight = 0 ; !goodnight ; )
	{
		looptimer = time( (time_t *)NULL ) + (time_t)2;

		send_command( 0x00 );
		(void)recv_reply();

		if( altrelaycmd ) operate_relay();

		if( time(&now) < looptimer ) sleep( looptimer - now );
	}

	/* someone signalled us */

	syslog( LOG_DAEMON | LOG_NOTICE, "Disabling Watchdog" );

	if( shut_berk() )
	{
		/* this is really, really bad news...  */

		syslog( LOG_DAEMON | LOG_CRIT, "NACK Disabling Watchdog On Exit" );
	}

	syslog( LOG_DAEMON | LOG_NOTICE, "Goodnight!" );

}

/***********************************************************************
 * real work starts here.
 * if DEBUG isn't defined at compile time this will fork, with the
 * child calling daemonize parent writing pidfile.  
 **********************************************************************/

int main( int argc, char **argv, char **env )
{
	char *devicename;
	pid_t pid;

	/*
	 * Open syslog and deliver standard startup message.
	 */

	openlog( "watchd", LOG_NDELAY | LOG_CONS | LOG_PID, LOG_DAEMON );

	syslog( LOG_DAEMON | LOG_INFO, "watchd for Berkshire Products Internal Serial Watchdog." );

	/*
	 * grab the device name from the command line or
	 * default and open it.
	 */

	devicename = argc > 1 ? argv[1] : "/dev/ttyS0";

	syslog( LOG_DAEMON | LOG_INFO, "Serial Device: %s", devicename );

	debug_str( "Serial Device", devicename );

	open_fd( devicename );

#ifndef DEBUG

	/* 
	 * if we fail to fork then the card is left 
	 * unarmed (and hopefully undangerous.
	 */

	if( (pid = fork()) < 0 ) 
	{
		/*
		 * log the fact and be done with it.
		 */

		perror( "forkaphobia" );

		syslog( LOG_DAEMON | LOG_ERR, "Failed fork, no daemon running, card inactive" );
	}
	else if( pid )
	{
		fprintf( stderr, "\nwatchd running as %d\n", pid );
	}
	else

#endif /* DEBUG */

	{

		/*
		 * regardless of DEBUG, we always run this.
		 * damonize will deal with closing fd's, etc.
		 * note that we only initialize the card if
		 * the fork succeeds or we were compiled with
		 * -DDEBUG.
		 *
		 * writing the pidfile here makes testing easier
		 * since there is always a pidfile available even
		 * in debug mode.
		 */

		FILE *pidfile;

		if( (pidfile=fopen("/var/run/watchd.pid","w")) == (FILE *)NULL )
		{
			perror( "Failed opening watchd.pid" );
			syslog( LOG_DAEMON | LOG_ERR, "Failed opening /var/run/watchd.pid." );
		}
		else
		{
			fprintf( pidfile, "%d\n", getpid() );
			fclose( pidfile );
		}

		/*
		 * once we've attempted to arm the thing it's
		 * probably a good idea to keep feeding it.
		 * hence, no check on init_berk return value.
		 * if that failed silently then the read/write
		 * error checks will zap us eventually anyway.
		 */

		init_berk();
		daemonize();
	}

	/*
	 * parent reaches here immediately after forking.
	 * child reaches here after calling daemonize and being signalled.
	 */

	exit( 0 );
}

/*
 * finito
 */
